# -*- coding: utf-8 -*-
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from matplotlib_examples.items import MatplotlibExamplesItem


class ExamplesSpider(CrawlSpider):
    name = 'examples'
    allowed_domains = ['matplotlib.org']
    start_urls = ['https://matplotlib.org/examples/index.html']

    rules = (
        # 对于网页所有符合规则的url进行爬去，并发送亲请求，返回给回调函数
        Rule(LinkExtractor(restrict_css='div.toctree-wrapper.compound', deny=r'/index.html$'), callback='parse_example', follow=True),
    )


    def parse_example(self, response):
        href = response.css('a.reference.external::attr(href)').extract_first()
        url = response.urljoin(href)
        example = MatplotlibExamplesItem(file_urls=[url])
        return example
